library(st514)
# IndlĂŚsning af data
data("T6-19")
T6.19 <- tbl
T6.19
## V1 V2 V3
## 1 271.0 18.50 F
## 2 477.0 82.50 F
## 3 306.3 23.40 F
## 4 365.3 33.50 F
## 5 466.0 69.00 F
## 6 440.7 54.00 F
## 7 315.0 24.97 F
## 8 417.5 56.75 F
## 9 307.3 23.15 F
## 10 319.0 29.51 F
## 11 303.9 19.98 F
## 12 331.7 24.00 F
## 13 435.0 70.37 F
## 14 261.3 15.50 F
## 15 384.8 63.00 F
## 16 360.3 39.00 F
## 17 441.4 53.00 F
## 18 246.7 15.75 F
## 19 365.3 44.00 F
## 20 336.8 30.00 F
## 21 326.7 34.00 F
## 22 312.0 25.00 F
## 23 226.7 9.25 F
## 24 347.4 30.00 F
## 25 280.2 15.25 F
## 26 290.7 21.50 F
## 27 438.6 57.00 F
## 28 377.1 61.50 F
## 29 176.7 3.00 M
## 30 259.5 9.75 M
## 31 258.0 10.07 M
## 32 229.8 7.50 M
## 33 233.0 6.25 M
## 34 237.5 9.85 M
## 35 268.3 10.00 M
## 36 222.5 9.00 M
## 37 186.5 3.75 M
## 38 238.8 9.75 M
## 39 257.6 9.75 M
## 40 172.0 3.00 M
## 41 244.7 10.00 M
## 42 224.7 7.25 M
## 43 231.7 9.25 M
## 44 235.9 7.50 M
## 45 236.5 5.75 M
## 46 247.4 7.75 M
## 47 223.0 5.75 M
## 48 223.7 5.75 M
## 49 212.5 7.65 M
## 50 223.2 7.75 M
## 51 225.0 5.84 M
## 52 228.0 7.53 M
## 53 215.6 5.75 M
## 54 221.0 6.45 M
## 55 236.7 6.49 M
## 56 235.3 6.00 M
colnames(T6.19) <- c("Snout Vent Length (cm)", "Weight (kg)", "Gender")
levels(T6.19$Gender) <- c("Female", "Male")
T6.19
## Snout Vent Length (cm) Weight (kg) Gender
## 1 271.0 18.50 Female
## 2 477.0 82.50 Female
## 3 306.3 23.40 Female
## 4 365.3 33.50 Female
## 5 466.0 69.00 Female
## 6 440.7 54.00 Female
## 7 315.0 24.97 Female
## 8 417.5 56.75 Female
## 9 307.3 23.15 Female
## 10 319.0 29.51 Female
## 11 303.9 19.98 Female
## 12 331.7 24.00 Female
## 13 435.0 70.37 Female
## 14 261.3 15.50 Female
## 15 384.8 63.00 Female
## 16 360.3 39.00 Female
## 17 441.4 53.00 Female
## 18 246.7 15.75 Female
## 19 365.3 44.00 Female
## 20 336.8 30.00 Female
## 21 326.7 34.00 Female
## 22 312.0 25.00 Female
## 23 226.7 9.25 Female
## 24 347.4 30.00 Female
## 25 280.2 15.25 Female
## 26 290.7 21.50 Female
## 27 438.6 57.00 Female
## 28 377.1 61.50 Female
## 29 176.7 3.00 Male
## 30 259.5 9.75 Male
## 31 258.0 10.07 Male
## 32 229.8 7.50 Male
## 33 233.0 6.25 Male
## 34 237.5 9.85 Male
## 35 268.3 10.00 Male
## 36 222.5 9.00 Male
## 37 186.5 3.75 Male
## 38 238.8 9.75 Male
## 39 257.6 9.75 Male
## 40 172.0 3.00 Male
## 41 244.7 10.00 Male
## 42 224.7 7.25 Male
## 43 231.7 9.25 Male
## 44 235.9 7.50 Male
## 45 236.5 5.75 Male
## 46 247.4 7.75 Male
## 47 223.0 5.75 Male
## 48 223.7 5.75 Male
## 49 212.5 7.65 Male
## 50 223.2 7.75 Male
## 51 225.0 5.84 Male
## 52 228.0 7.53 Male
## 53 215.6 5.75 Male
## 54 221.0 6.45 Male
## 55 236.7 6.49 Male
## 56 235.3 6.00 Male
mean(T6.19$"Snout Vent Length (cm)")
## [1] 288.5143
mean(T6.19$"Weight (kg)")
## [1] 22.27696
# Variance of x1 (using n divisor)
var(T6.19$"Snout Vent Length (cm)") * (nrow(T6.19) - 1) / nrow(T6.19)
## [1] 6084.646
# Variance of x2 (using n divisor)
var(T6.19$"Weight (kg)") * (nrow(T6.19) - 1) / nrow(T6.19)
## [1] 421.5405
# Covariance between x1 and x2 (using n divisor)
cov(T6.19$"Snout Vent Length (cm)", T6.19$"Weight (kg)") * (nrow(T6.19) - 1) / nrow(T6.19)
## [1] 1539.699
# Correlation between x1 and x2
cor(T6.19$"Snout Vent Length (cm)", T6.19$"Weight (kg)")
## [1] 0.9613875
# Create a scatterplot with a marginal dot plot for the variables x1 and x2
marginal.dot.plot(T6.19$"Snout Vent Length (cm)", T6.19$"Weight (kg)", xlab = expression("Snout Vent Length (cm) x"[1]), ylab = expression("Weight (kg) x"[2]))
# Variance-covariance matrix:
#create t6.19 nuerical withouht column gender
T6.19_numerical <- T6.19[, 1:2]
#print(round(cov(T6.19_numerical) * (nrow(T6.19) - 1) / nrow(T6.19)),3)
print(round(cov(T6.19_numerical) * (nrow(T6.19) - 1) / nrow(T6.19), digits=3))
## Snout Vent Length (cm) Weight (kg)
## Snout Vent Length (cm) 6084.646 1539.699
## Weight (kg) 1539.699 421.540
# Find standardafvigelser (kvadratrod af diagonale vĂŚrdier i varians-kovarians-matrixen)
vcov_matrix <- cov(T6.19_numerical) * (nrow(T6.19) - 1) / nrow(T6.19)
# TrĂŚk diagonalen ud (varians for hver variabel), og tag kvadratroden
std_devs <- sqrt(diag(vcov_matrix))
# Udskriv med beskrivende tekst
cat("Standardafvigelser (âvarians):\n")
## Standardafvigelser (âvarians):
print(round(std_devs, 2))
## Snout Vent Length (cm) Weight (kg)
## 78.00 20.53
# Dimensioner
dim(T6.19)
## [1] 56 3
56 observations and 3 variables
#Variabeltyper
str(T6.19)
## 'data.frame': 56 obs. of 3 variables:
## $ Snout Vent Length (cm): num 271 477 306 365 466 ...
## $ Weight (kg) : num 18.5 82.5 23.4 33.5 69 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 1 1 1 1 1 1 1 1 1 1 ...
#Sammendrag
summary(T6.19)
## Snout Vent Length (cm) Weight (kg) Gender
## Min. :172.0 Min. : 3.00 Female:28
## 1st Qu.:229.3 1st Qu.: 7.50 Male :28
## Median :258.8 Median :10.04
## Mean :288.5 Mean :22.28
## 3rd Qu.:333.0 3rd Qu.:30.00
## Max. :477.0 Max. :82.50
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
# Tilføj et unikt ID til hver observation
T6.19_obs <- T6.19 %>%
mutate(Obs = paste0("Obs", row_number()))
# Gør dataen lang for Snout og Weight
T6.19_long <- T6.19_obs %>%
dplyr::select(`Obs`, `Gender`, `Snout Vent Length (cm)`, `Weight (kg)`) %>%
pivot_longer(cols = c(`Snout Vent Length (cm)`, `Weight (kg)`),
names_to = "Variable", values_to = "Value")
# Standardiser variabelnavne lidt for pĂŚnere labels
library(stringr)
T6.19_long$Variable <- str_replace_all(T6.19_long$Variable,
c("Snout Vent Length \\(cm\\)" = "Snout Length",
"Weight \\(kg\\)" = "Weight"))
# Plot: Hver søjle er Ên observation
ggplot(T6.19_long, aes(x = Obs, y = Value, fill = Gender)) +
geom_bar(stat = "identity") +
facet_wrap(~ Variable, scales = "free_y") +
labs(
title = "Snout Length og Weight for alle observationer",
x = "Observationer", y = "VĂŚrdi"
) +
scale_fill_manual(values = c("Female" = "#F8766D", "Male" = "#619CFF")) +
theme_minimal() +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())
library(dplyr)
library(tidyr)
library(ggplot2)
library(stringr)
# Tilføj numerisk ID og behold som karakter for rÌkkefølge
T6.19_obs <- T6.19 %>%
mutate(Obs = row_number())
# Gør data lang
T6.19_long <- T6.19_obs %>%
dplyr::select(Obs, Gender, `Snout Vent Length (cm)`, `Weight (kg)`) %>%
pivot_longer(cols = c(`Snout Vent Length (cm)`, `Weight (kg)`),
names_to = "Variable", values_to = "Value")
# Standardiser navne
T6.19_long$Variable <- str_replace_all(T6.19_long$Variable,
c("Snout Vent Length \\(cm\\)" = "Snout Length",
"Weight \\(kg\\)" = "Weight"))
# Gør Obs til faktor for rÌkkefølge
T6.19_long$Obs <- factor(T6.19_long$Obs, levels = 1:56)
# Kombiner observation og variabel til unik bar (bruges ikke i plot her men gemmes til evt. brug)
T6.19_long$ObsVar <- interaction(T6.19_long$Obs, T6.19_long$Variable, sep = "_")
# Plot begge variabler som separate søjler i Êt diagram
ggplot(T6.19_long, aes(x = Obs, y = Value, fill = Gender)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.9), aes(group = Variable)) +
labs(
title = "Snout Length og Weight pr. observation",
x = "Observation", y = "VĂŚrdi"
) +
scale_fill_manual(values = c("Female" = "#F8766D", "Male" = "#619CFF")) +
theme_minimal()
library(dplyr)
library(tidyr)
library(ggplot2)
# Beregn gennemsnit for hver kombination af variabel og køn
T6.19_means <- T6.19 %>%
group_by(Gender) %>%
summarise(
Snout = mean(`Snout Vent Length (cm)`),
Weight = mean(`Weight (kg)`)
) %>%
pivot_longer(cols = c(Snout, Weight), names_to = "Variable", values_to = "Mean")
# Plot
ggplot(T6.19_means, aes(x = Gender, y = Mean, fill = Variable)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
title = "Gennemsnitlig Snout Length og Weight fordelt pü køn",
x = "Køn", y = "Gennemsnitlig vÌrdi"
) +
scale_fill_manual(values = c("Snout" = "#619CFF", "Weight" = "#F8766D")) +
theme_minimal()
# Scatterplot of length and weight given gender=female
## Scatterplot: Females only
T6.19_female <- subset(T6.19, Gender == "Female")
plot(T6.19_female$`Snout Vent Length (cm)`, T6.19_female$`Weight (kg)`,
xlab = expression("Snout Vent Length (cm) x"[1]),
ylab = expression("Weight (kg) x"[2]),
main = "Scatterplot of Female Anacondas",
pch = 19, col = "darkred")
## Scatterplot: Males only
T6.19_male <- subset(T6.19, Gender == "Male")
plot(T6.19_male$`Snout Vent Length (cm)`, T6.19_male$`Weight (kg)`,
xlab = expression("Snout Vent Length (cm) x"[1]),
ylab = expression("Weight (kg) x"[2]),
main = "Scatterplot of Male Anacondas",
pch = 19, col = "darkblue")
plot(T6.19$`Snout Vent Length (cm)`, T6.19$`Weight (kg)`,
xlab = expression("Snout Vent Length (cm) x"[1]),
ylab = expression("Weight (kg) x"[2]),
main = "Scatterplot by Gender",
pch = 19,
col = ifelse(T6.19$Gender == "Female", "darkred", "darkblue"))
legend("topleft", legend = c("Female", "Male"),
col = c("darkred", "darkblue"), pch = 19)
#Mean vector
print(colMeans(T6.19_numerical, na.rm = TRUE), digits = 5)
## Snout Vent Length (cm) Weight (kg)
## 288.514 22.277
# colMeans of T6.19 where gender=female
# Column means for females
T6.19_female <- subset(T6.19, Gender == "Female")
T6.19_female_numerical <- T6.19_female[, 1:2]
print(colMeans(T6.19_female_numerical, na.rm = TRUE), digits = 5)
## Snout Vent Length (cm) Weight (kg)
## 348.275 37.264
# colMeans of T6.19 where gender=male
# Column means for males
T6.19_male <- subset(T6.19, Gender == "Male")
T6.19_male_numerical <- T6.19_male[, 1:2]
print(colMeans(T6.19_male_numerical, na.rm = TRUE), digits = 5)
## Snout Vent Length (cm) Weight (kg)
## 228.7536 7.2904
#correlation matrix
print(cor(T6.19_numerical))
## Snout Vent Length (cm) Weight (kg)
## Snout Vent Length (cm) 1.0000000 0.9613875
## Weight (kg) 0.9613875 1.0000000
pairs(T6.19[, 1:2],
main = "Scatterplot Matrix: Length and Weight",
pch = 21,
bg = c("lightpink", "lightblue")[T6.19$Gender])
#pairs(T6.19_numerical, diag.panel = panel.boxplot, labels = "")
pairs(T6.19_numerical,
diag.panel = panel.boxplot,
labels = c("Snout Length", "Weight"),
main = "Scatterplot Matrix: Length and Weight")
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
# Calculate correlation coefficient
cat("Correlation coefficient:", cor(T6.19$"Snout Vent Length (cm)", T6.19$"Weight (kg)"), "\n")
## Correlation coefficient: 0.9613875
# Calculate correlation coefficient
cat("Correlation coefficient:", cor(T6.19$"Weight (kg)", T6.19$"Snout Vent Length (cm)"), "\n")
## Correlation coefficient: 0.9613875
# Plot af data
plot(T6.19_numerical,
xlim = c(0, 500),
ylim = c(0, 100),
main = "Scatterplot med stikprøvegennemsnit",
xlab = "Snout Vent Length (cm) x1", ylab = "Weight (kg) x2", pch = 16)
grid()
# Beregning af stikprøvegennemsnit
x_bar <- colMeans(T6.19_numerical)
#x_bar
# Tilføj stikprøvegennemsnit til plot
points(x_bar[1], x_bar[2], pch = "X", col = "red", cex = 2)
# Tilføj en forklaring
legend("topleft",
legend = c("56 Observationer", "Stikprøvegennemsnit"),
pch = c(16, 4),
col = c("black", "red"),
pt.lwd = c(1, 3))
# Vis stikprøvegennemsnittet
cat("Stikprøvegennemsnittet er:", x_bar, "\n")
## Stikprøvegennemsnittet er: 288.5143 22.27696
# UdtrĂŚk data
Xf <- T6.19[T6.19$Gender == "Female", 1:2]
Xm <- T6.19[T6.19$Gender == "Male", 1:2]
n1 <- nrow(Xf); n2 <- nrow(Xm)
xbar1 <- colMeans(Xf)
xbar2 <- colMeans(Xm)
diff_mean <- xbar1 - xbar2
# Pooled kovariansmatrix
S1 <- cov(Xf); S2 <- cov(Xm)
Spooled <- ((n1 - 1)*S1 + (n2 - 1)*S2)/(n1 + n2 - 2)
# EgenvĂŚrdier og egenvektorer
eig <- eigen(Spooled)
lambda <- eig$values # egenvĂŚrdier
vectors <- eig$vectors # egenvektorer
# Parametre for ellipse
alpha <- 0.05
p <- 2
n <- (n1 * n2) / (n1 + n2)
T2_crit <- ((n1 + n2 - 2) * p / (n1 + n2 - p - 1)) * qf(1 - alpha, p, n1 + n2 - p - 1)
# GenerĂŠr ellipsepunkter
theta <- seq(0, 2*pi, length = 100)
circle <- rbind(cos(theta), sin(theta))
scaling <- sqrt(T2_crit / n) * sqrt(lambda)
ellipse_points <- t(vectors %*% diag(scaling) %*% circle + diff_mean)
# Plot
plot(ellipse_points, type = "l", asp = 1, lwd = 2,
xlab = "Snout Vent Length (cm)", ylab = "Weight (kg)",
main = "Hotellingâs T² Ellipse (uden ellipse-pakke)")
points(0, 0, pch = 4, col = "red")
text(0, 0, "0", pos = 1, cex = 0.8)
points(diff_mean[1], diff_mean[2], pch = 19, col = "blue")
text(diff_mean[1], diff_mean[2], "M1 - M2", pos = 4, cex = 0.8)
# Beregn generaliseret stikprøvevarians for Spooled matrix
Xf <- T6.19[T6.19$Gender == "Female", 1:2]
Xm <- T6.19[T6.19$Gender == "Male", 1:2]
n1 <- nrow(Xf); n2 <- nrow(Xm)
S1 <- cov(Xf); S2 <- cov(Xm)
Spooled <- ((n1 - 1)*S1 + (n2 - 1)*S2)/(n1 + n2 - 2)
# Determinant = generaliseret varians
gen_var <- det(Spooled)
cat("Generaliseret stikprøvevarians (|S|):", round(gen_var, 2))
## Generaliseret stikprøvevarians (|S|): 86170.38
1.1 Univariate normalitetstest (marginal):
Brug QâQ plot for hver af de to variable (Snout Vent Length og Weight).
F.eks. som i Ăvelse 5.4.B.
1.2 Multivariat normalitetstest:
Brug QâQ plot for Mahalanobis afstande mod kvantilfunktion for Ď² (chi-square plot).
F.eks. baseret pĂĽ procedure i bogens Kapitel 4, Ex. 4.13 og Resultat 4.7
# Lav Q-Q plots for hver af de tre variable
par(mfrow = c(2, 2))
for (i in 1:p) {
qqnorm(T6.19_numerical[,i], main = colnames(T6.19_numerical)[i])
qqline(T6.19_numerical[,i])
}
# Beregn Mahalanobis-afstandene
Sx <- cov(T6.19_numerical)
D2 <- mahalanobis(T6.19_numerical, colMeans(T6.19_numerical), Sx)
# Lav et chi-i-anden Q-Q plot for multivariate normalitet
qqplot(qchisq(ppoints(n, a = 0.5), df = p), D2,
ylab = "Mahalanobis afstande",
xlab = bquote("Kvantiler af " ~ chi[.(p)]^2),
main = bquote("Q-Q plot af Mahalanobis" * ~ D^2 *
" vs. kvantiler af" * ~ chi[.(p)]^2))
abline(0, 1)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(car) # til powerTransform
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
# Antal observationer og dimensioner
n <- nrow(T6.19_numerical)
p <- ncol(T6.19_numerical)
# Box-Cox transformationer
pt_result <- powerTransform(T6.19_numerical)
lambda_vec <- pt_result$lambda
print(lambda_vec)
## Snout Vent Length (cm) Weight (kg)
## -0.49978201 -0.03326786
# Transformer hver variabel
T6.19_transformed <- as.data.frame(
sapply(1:p, function(i) {
lambda <- lambda_vec[i]
x <- T6.19_numerical[, i]
if (lambda == 0) log(x) else (x^lambda - 1) / lambda
})
)
colnames(T6.19_transformed) <- colnames(T6.19_numerical)
# Plot QQ-plots for de transformerede variable
par(mfrow = c(2, 2))
for (i in 1:p) {
qqnorm(T6.19_transformed[, i],
main = paste("Q-Q plot (transformeret):", colnames(T6.19_transformed)[i]))
qqline(T6.19_transformed[, i])
}
# Mahalanobis-afstande (baseret pĂĽ transformerede data)
Sx <- cov(T6.19_transformed)
D2 <- mahalanobis(T6.19_transformed, colMeans(T6.19_transformed), Sx)
# Q-Q plot af Mahalanobis afstande vs. chi^2
qqplot(qchisq(ppoints(n, a = 0.5), df = p), D2,
ylab = expression("Mahalanobis afstande"),
xlab = bquote("Kvantiler af " ~ chi[.(p)]^2),
main = bquote("Q-Q plot af Mahalanobis" * ~ D^2 * " (transformeret)"))
abline(0, 1)
.
# Univariate t-tests for Snout Vent Length og Weight mod Gender
# Antag at køn er korrekt kodet som faktor med levels: Female, Male
T6.19$Gender <- factor(T6.19$Gender, levels = c("Female", "Male"))
# T-test for Snout Vent Length (cm) mellem køn
t_snout <- t.test(`Snout Vent Length (cm)` ~ Gender, data = T6.19, var.equal = TRUE)
#var.equal = TRUE bruges her i trüd med forudsÌtningen fra tidligere øvelser, hvor vi antager lig varians ved sammenligninger af grupper.
print(t_snout)
##
## Two Sample t-test
##
## data: Snout Vent Length (cm) by Gender
## t = 8.7597, df = 54, p-value = 0.000000000005976
## alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
## 95 percent confidence interval:
## 92.16599 146.87687
## sample estimates:
## mean in group Female mean in group Male
## 348.2750 228.7536
â Korrekt fortolkning:
Hâ (nulhypotese): Der er ingen forskel i gennemsnitlig snout length mellem han- og hun-anacondaer.
Hâ (alternativ hypotese): Der er en forskel i gennemsnitlig snout length mellem kønnene.
đ Resultat:
P-vĂŚrdien er ekstremt lille (â 5.98e-12), dvs. langt under den typiske signifikansgrĂŚnse (Îą = 0.05).
Derfor forkaster vi Hâ: Der er statistisk evidens for, at snout length afhĂŚnger af køn.
Konfidensintervallet [92.17, 146.88][92.17, 146.88] indeholder ikke nul, hvilket styrker denne konklusion.
Hun-anacondaer (â) har i gennemsnit ca. 120 cm lĂŚngere snout end hanner (â).
â Korrigeret fortolkning:
"Da p-vĂŚrdien er meget lav (<< 0.05), forkastes Hâ. Der er altsĂĽ en statistisk signifikant forskel i snout length mellem hun- og han-anacondaer. Det tyder pĂĽ, at snout length afhĂŚnger af køn."
đĽ OBS: Din formulering âH0 hypotesen ikke forkastes baseret pĂĽ lav p-vĂŚrdiâ er forkert â lav p-vĂŚrdi betyder netop, at vi forkaster Hâ.
# T-test for Weight (kg) mellem køn
t_weight <- t.test(`Weight (kg)` ~ Gender, data = T6.19, var.equal = TRUE)
#var.equal = TRUE bruges her i trüd med forudsÌtningen fra tidligere øvelser, hvor vi antager lig varians ved sammenligninger af grupper.
print(t_weight)
##
## Two Sample t-test
##
## data: Weight (kg) by Gender
## t = 7.8475, df = 54, p-value = 0.0000000001739
## alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
## 95 percent confidence interval:
## 22.31565 37.63078
## sample estimates:
## mean in group Female mean in group Male
## 37.263571 7.290357
â Korrekt fortolkning:
Hâ (nulhypotese): Der er ingen forskel i gennemsnitlig vĂŚgt mellem kønnene.
Hâ (alternativ hypotese): Der er forskel i gennemsnitlig vĂŚgt mellem kønnene.
đ Resultat:
P-vĂŚrdien er ekstremt lav (â 1.74e-10), altsĂĽ klart under 0.05 â forkast Hâ.
Der er statistisk belÌg for, at køn har stor betydning for vÌgt.
Konfidensintervallet viser, at hunner vejer 22â38 kg mere end hanner i gennemsnit.
â Korrigeret fortolkning:
"P-vĂŚrdien er meget lav, hvilket betyder, at vi forkaster Hâ. Der er statistisk signifikant forskel i vĂŚgt mellem køn. VĂŚgten afhĂŚnger af køn, og hun-anacondaer vejer gennemsnitligt betydeligt mere."
(Sammenfatning)
â Samlet konklusion:
Test p-vĂŚrdi Hâ forkastes? Fortolkning Snout Length ~ Gender 5.98e-12 Ja Snout length afhĂŚnger af køn Weight ~ Gender 1.74e-10 Ja VĂŚgt afhĂŚnger af køn
đ Tabel til posteren (Univariate T-tests)
| Variabel | MiddelvĂŚrdi (â) | MiddelvĂŚrdi (â) | T-vĂŚrdi | P-vĂŚrdi | 95% CI for forskel | Konklusion |
|---|---|---|---|---|---|---|
| Snout Length (cm) | 348.28 | 228.75 | 8.76 | < 0.000000000006 | [92.17, 146.88] | Signifikant forskel. AfhÌnger af køn. |
| Weight (kg) | 37.26 | 7.29 | 7.85 | < 0.000000000174 | [22.32, 37.63] | Signifikant forskel. AfhÌnger af køn. |
đ§ž Noter til posteren (fortolkning og forklaring):
Univariate T-tests blev udført for hver af de to variable:
Snout Length (cm) og Weight (kg) fordelt pĂĽ køn (â vs â).
Begge tests viser ekstremt lave p-vĂŚrdier (<< 0.05), hvilket betyder, at vi forkaster nulhypotesen.
đ˘ Fortolkning:
Der er statistisk signifikant forskel i bĂĽde lĂŚngde og vĂŚgt mellem hun- og han-anacondaer.
Hun-anacondaer har i gennemsnit 120 cm lÌngere snout og 30 kg højere vÌgt end hanner.
Disse forskelle er ikke blot statistisk signifikante, men ogsĂĽ biologisk relevante.
đŚ Boxplots for Snout Length og Weight fordelt pĂĽ køn
library(ggplot2)
library(tidyr)
library(dplyr)
# Gør data lang
T6.19_long <- T6.19 %>%
dplyr::select(Gender, `Snout Vent Length (cm)`, `Weight (kg)`) %>%
pivot_longer(cols = -Gender, names_to = "Variable", values_to = "Value")
# Standardiser labels
T6.19_long$Variable <- gsub("Snout Vent Length \\(cm\\)", "Snout Length", T6.19_long$Variable)
T6.19_long$Variable <- gsub("Weight \\(kg\\)", "Weight", T6.19_long$Variable)
# Boxplot
ggplot(T6.19_long, aes(x = Gender, y = Value, fill = Gender)) +
geom_boxplot(alpha = 0.7) +
facet_wrap(~ Variable, scales = "free_y") +
labs(
title = "Boxplots af Snout Length og Weight fordelt pü køn",
x = "Køn", y = "VÌrdi"
) +
scale_fill_manual(values = c("Female" = "#F8766D", "Male" = "#619CFF")) +
theme_minimal()
# Violinplot
ggplot(T6.19_long, aes(x = Gender, y = Value, fill = Gender)) +
geom_violin(trim = FALSE, alpha = 0.6, bw = 10) + # Juster bw (bandwidth)
geom_boxplot(width = 0.1, fill = "white", outlier.shape = NA) +
facet_wrap(~ Variable, scales = "free_y") +
labs(
title = "Violinplots af Snout Length og Weight fordelt pü køn",
x = "Køn", y = "VÌrdi"
) +
scale_fill_manual(values = c("Female" = "#F8766D", "Male" = "#619CFF")) +
theme_minimal()
ggplot(T6.19_long, aes(x = Gender, y = Value, fill = Gender)) +
geom_violin(trim = FALSE, scale = "count", alpha = 0.6) +
geom_boxplot(width = 0.1, fill = "white", outlier.shape = NA) +
facet_wrap(~ Variable, scales = "free_y") +
labs(
title = "Violinplots af Snout Length og Weight fordelt pü køn",
x = "Køn", y = "VÌrdi"
) +
scale_fill_manual(values = c("Female" = "#F8766D", "Male" = "#619CFF")) +
theme_minimal()
# Violinplot: Snout Length
ggplot(T6.19, aes(x = Gender, y = `Snout Vent Length (cm)`, fill = Gender)) +
geom_violin(trim = FALSE, alpha = 0.6) +
geom_boxplot(width = 0.1, fill = "white", outlier.shape = NA) +
labs(
title = "Violin Plot: Snout Length by Gender",
x = "Gender", y = "Snout Length (cm)"
) +
scale_fill_manual(values = c("Female" = "#F8766D", "Male" = "#619CFF")) +
theme_minimal()
# Violinplot: Weight
ggplot(T6.19, aes(x = Gender, y = `Weight (kg)`, fill = Gender)) +
geom_violin(trim = FALSE, alpha = 0.6) +
geom_boxplot(width = 0.1, fill = "white", outlier.shape = NA) +
labs(
title = "Violin Plot: Weight by Gender",
x = "Gender", y = "Weight (kg)"
) +
scale_fill_manual(values = c("Female" = "#F8766D", "Male" = "#619CFF")) +
theme_minimal()
# Pakker
library(ggplot2)
library(dplyr)
# Plot 1: Snout Length by Gender
ggplot(T6.19, aes(x = Gender, y = `Snout Vent Length (cm)`, fill = Gender)) +
geom_violin(trim = FALSE, alpha = 0.6) +
geom_boxplot(width = 0.1, fill = "white", outlier.shape = NA) +
labs(
title = "Violin Plot: Snout Length by Gender",
x = "Gender",
y = "Snout Length (cm)"
) +
scale_fill_manual(values = c("Female" = "#F8766D", "Male" = "#619CFF")) +
theme_minimal()
# Plot 2: Weight by Gender
ggplot(T6.19, aes(x = Gender, y = `Weight (kg)`, fill = Gender)) +
geom_violin(trim = FALSE, alpha = 0.6) +
geom_boxplot(width = 0.1, fill = "white", outlier.shape = NA) +
labs(
title = "Violin Plot: Weight by Gender",
x = "Gender",
y = "Weight (kg)"
) +
scale_fill_manual(values = c("Female" = "#F8766D", "Male" = "#619CFF")) +
theme_minimal()
# UdvÌlg de første 3 observationer (uanset køn)
X <- as.matrix(T6.19[1:3, 1:2]) # 3x2 matrix (Snout, Weight)
# Beregn middelvektor (kolonnegennemsnit)
x_bar <- colMeans(X)
# Lav 1-vektor
Ones <- rep(1, nrow(X))
# Beregn afvigelsesmatrix: Y = X - 1 * x_bar^T
Y <- X - Ones %*% t(x_bar)
# UdtrĂŚk kolonnerne som vektorer
Y1 <- Y[,1] # Snout
Y2 <- Y[,2] # Weight
# Beregn lĂŚngder
Ly1 <- sqrt(sum(Y1^2))
Ly2 <- sqrt(sum(Y2^2))
# Beregn cosinus og vinkel
costh <- sum(Y1 * Y2) / (Ly1 * Ly2)
ang <- acos(costh) * 180 / pi
# Varians, kovarians og korrelation
s11 <- sum(Y1^2) / nrow(X)
s22 <- sum(Y2^2) / nrow(X)
s12 <- sum(Y1 * Y2) / nrow(X)
r12 <- s12 / sqrt(s11 * s22)
# Print resultater
cat("Afvigelsesvektorer for Anaconda-data (første 3 rÌkker):\n\n")
## Afvigelsesvektorer for Anaconda-data (første 3 rÌkker):
cat("LĂŚngde af afvigelsesvektor Snout:", Ly1, "\n")
## LĂŚngde af afvigelsesvektor Snout: 155.7996
cat("LĂŚngde af afvigelsesvektor Weight:", Ly2, "\n")
## LĂŚngde af afvigelsesvektor Weight: 50.37466
cat("Cosinus til vinkel mellem vektorer:", costh, "\n")
## Cosinus til vinkel mellem vektorer: 0.9957646
cat("Vinkel mellem vektorer (grader):", ang, "\n\n")
## Vinkel mellem vektorer (grader): 5.275185
cat("Varians af Snout:", s11, "\n")
## Varians af Snout: 8091.176
cat("Varians af Weight:", s22, "\n")
## Varians af Weight: 845.8689
cat("Kovarians:", s12, "\n")
## Kovarians: 2605.038
cat("Korrelationskoefficient:", r12, "\n")
## Korrelationskoefficient: 0.9957646
# UdtrÌk de første 3 observationer (Snout og Weight)
X <- as.matrix(T6.19[1:3, 1:2])
x_bar <- colMeans(X)
Ones <- rep(1, 3)
# Afvigelsesmatrix: Y = X - 1 * x_bar^T
Y <- X - Ones %*% t(x_bar)
# UdtrĂŚk kolonner
Y1 <- Y[,1] # Snout
Y2 <- Y[,2] # Weight
# Plot setup
plot(NA, xlim = range(c(0, Y1)), ylim = range(c(0, Y2)),
xlab = "Snout deviation", ylab = "Weight deviation",
main = "Afvigelsesvektorer fra origo (2D)", asp = 1)
grid()
# Tegn vektorer fra origo (0,0)
arrows(0, 0, Y1[1], Y2[1], col = "red", lwd = 2)
arrows(0, 0, Y1[2], Y2[2], col = "blue", lwd = 2)
arrows(0, 0, Y1[3], Y2[3], col = "darkgreen", lwd = 2)
# Tilføj labels
text(Y1[1], Y2[1], labels = "Y1", pos = 4, col = "red")
text(Y1[2], Y2[2], labels = "Y2", pos = 4, col = "blue")
text(Y1[3], Y2[3], labels = "Y3", pos = 4, col = "darkgreen")
# UdtrÌk de første 5 observationer for hver køn
T6.19_5females <- T6.19[T6.19$Gender == "Female", ][1:5, 1:2]
T6.19_5males <- T6.19[T6.19$Gender == "Male", ][1:5, 1:2]
# Kombiner i ĂŠn matrix (2 rĂŚkker, 5 kolonner pr variabel)
snout_matrix <- rbind(
T6.19_5males$`Snout Vent Length (cm)`,
T6.19_5females$`Snout Vent Length (cm)`
)
rownames(snout_matrix) <- c("Male", "Female")
weight_matrix <- rbind(
T6.19_5males$`Weight (kg)`,
T6.19_5females$`Weight (kg)`
)
rownames(weight_matrix) <- c("Male", "Female")
# Midler
snout_mean <- matrix(rep(mean(snout_matrix), 10), nrow = 2, byrow = TRUE)
rownames(snout_mean) <- c("Male", "Female")
weight_mean <- matrix(rep(mean(weight_matrix), 10), nrow = 2, byrow = TRUE)
rownames(weight_mean) <- c("Male", "Female")
# Treatment effect
snout_group_means <- matrix(c(rep(mean(snout_matrix[1, ]), 5),
rep(mean(snout_matrix[2, ]), 5)),
nrow = 2, byrow = TRUE)
rownames(snout_group_means) <- c("Male", "Female")
snout_treatment <- snout_group_means - snout_mean
rownames(snout_treatment) <- c("Male", "Female")
weight_group_means <- matrix(c(rep(mean(weight_matrix[1, ]), 5),
rep(mean(weight_matrix[2, ]), 5)),
nrow = 2, byrow = TRUE)
rownames(weight_group_means) <- c("Male", "Female")
weight_treatment <- weight_group_means - weight_mean
rownames(weight_treatment) <- c("Male", "Female")
# Residual
snout_residual <- snout_matrix - snout_group_means
rownames(snout_residual) <- c("Male", "Female")
weight_residual <- weight_matrix - weight_group_means
rownames(weight_residual) <- c("Male", "Female")
# Vis resultat for Snout
cat("Snout Vent Length:\n\n")
## Snout Vent Length:
cat("Original data:\n"); print(snout_matrix)
## Original data:
## [,1] [,2] [,3] [,4] [,5]
## Male 176.7 259.5 258.0 229.8 233
## Female 271.0 477.0 306.3 365.3 466
cat("\nMean component:\n"); print(snout_mean)
##
## Mean component:
## [,1] [,2] [,3] [,4] [,5]
## Male 304.26 304.26 304.26 304.26 304.26
## Female 304.26 304.26 304.26 304.26 304.26
cat("\nTreatment effect:\n"); print(snout_treatment)
##
## Treatment effect:
## [,1] [,2] [,3] [,4] [,5]
## Male -72.86 -72.86 -72.86 -72.86 -72.86
## Female 72.86 72.86 72.86 72.86 72.86
cat("\nResidual:\n"); print(snout_residual)
##
## Residual:
## [,1] [,2] [,3] [,4] [,5]
## Male -54.70 28.10 26.60 -1.60 1.60
## Female -106.12 99.88 -70.82 -11.82 88.88
# Vis resultat for Weight
cat("\n\nWeight:\n\n")
##
##
## Weight:
cat("Original data:\n"); print(weight_matrix)
## Original data:
## [,1] [,2] [,3] [,4] [,5]
## Male 3.0 9.75 10.07 7.5 6.25
## Female 18.5 82.50 23.40 33.5 69.00
cat("\nMean component:\n"); print(weight_mean)
##
## Mean component:
## [,1] [,2] [,3] [,4] [,5]
## Male 26.347 26.347 26.347 26.347 26.347
## Female 26.347 26.347 26.347 26.347 26.347
cat("\nTreatment effect:\n"); print(weight_treatment)
##
## Treatment effect:
## [,1] [,2] [,3] [,4] [,5]
## Male -19.033 -19.033 -19.033 -19.033 -19.033
## Female 19.033 19.033 19.033 19.033 19.033
cat("\nResidual:\n"); print(weight_residual)
##
## Residual:
## [,1] [,2] [,3] [,4] [,5]
## Male -4.314 2.436 2.756 0.186 -1.064
## Female -26.880 37.120 -21.980 -11.880 23.620
# Som plot
library(tidyr)
library(dplyr)
library(ggplot2)
# ID og køn
ID <- paste0("Obs", 1:10)
Gender <- c(rep("Male", 5), rep("Female", 5))
# RÌkkefølgebevarende faktor
ID <- factor(ID, levels = paste0("Obs", 1:10))
# ----- Snout -----
df_snout <- data.frame(
ID = ID,
Gender = Gender,
Mean = as.vector(snout_mean),
Treatment = as.vector(snout_treatment),
Residual = as.vector(snout_residual)
) %>%
pivot_longer(cols = c("Mean", "Treatment", "Residual"),
names_to = "Component", values_to = "Value")
# ----- Weight -----
df_weight <- data.frame(
ID = ID,
Gender = Gender,
Mean = as.vector(weight_mean),
Treatment = as.vector(weight_treatment),
Residual = as.vector(weight_residual)
) %>%
pivot_longer(cols = c("Mean", "Treatment", "Residual"),
names_to = "Component", values_to = "Value")
# ----- Plot Snout -----
ggplot(df_snout, aes(x = ID, y = Value, fill = Component)) +
geom_bar(stat = "identity") +
ggtitle("Dekomponering af Snout (Length)") +
ylab("cm") +
theme_minimal()
# ----- Plot Weight -----
ggplot(df_weight, aes(x = ID, y = Value, fill = Component)) +
geom_bar(stat = "identity") +
ggtitle("Dekomponering af Weight") +
ylab("kg") +
theme_minimal()
library(tidyr)
library(dplyr)
library(ggplot2)
# ID og køn
ID <- paste0("Obs", 1:10)
Gender <- c(rep("Male", 5), rep("Female", 5))
ID <- factor(ID, levels = paste0("Obs", 1:10)) # RÌkkefølge
# ----- Snout -----
df_snout <- data.frame(
ID = ID,
Gender = Gender,
Mean = as.vector(snout_mean),
Treatment = as.vector(snout_treatment),
Residual = as.vector(snout_residual)
) %>%
pivot_longer(cols = c("Mean", "Treatment", "Residual"),
names_to = "Component", values_to = "Value") %>%
mutate(Component = factor(Component, levels = c("Mean", "Treatment", "Residual")))
# ----- Weight -----
df_weight <- data.frame(
ID = ID,
Gender = Gender,
Mean = as.vector(weight_mean),
Treatment = as.vector(weight_treatment),
Residual = as.vector(weight_residual)
) %>%
pivot_longer(cols = c("Mean", "Treatment", "Residual"),
names_to = "Component", values_to = "Value") %>%
mutate(Component = factor(Component, levels = c("Mean", "Treatment", "Residual")))
# ----- Plot Snout -----
ggplot(df_snout, aes(x = ID, y = Value, fill = Component)) +
geom_bar(stat = "identity") +
geom_text(aes(label = round(Value, 1)),
position = position_stack(vjust = 0.5),
size = 3, color = "black") +
scale_fill_manual(values = c("Mean" = "#F8766D", "Treatment" = "#619CFF", "Residual" = "#00BA38")) +
ggtitle("Dekomponering af Snout (Length)") +
ylab("cm") +
theme_minimal()
# ----- Plot Weight -----
ggplot(df_weight, aes(x = ID, y = Value, fill = Component)) +
geom_bar(stat = "identity") +
geom_text(aes(label = round(Value, 1)),
position = position_stack(vjust = 0.5),
size = 3, color = "black") +
scale_fill_manual(values = c("Mean" = "#F8766D", "Treatment" = "#619CFF", "Residual" = "#00BA38")) +
ggtitle("Dekomponering af Weight") +
ylab("kg") +
theme_minimal()
# Tilføj Gender label
gender <- c(rep("Female", 5), rep("Male", 5))
# Dataframes til snout
df_snout <- data.frame(
ID = factor(paste0("Obs", 1:10), levels = paste0("Obs", 1:10)),
Gender = gender,
Mean = as.vector(snout_mean),
Treatment = as.vector(snout_treatment),
Residual = as.vector(snout_residual)
) %>%
pivot_longer(cols = c("Mean", "Treatment", "Residual"),
names_to = "Component", values_to = "Value")
# Dataframes til weight
df_weight <- data.frame(
ID = factor(paste0("Obs", 1:10), levels = paste0("Obs", 1:10)),
Gender = gender,
Mean = as.vector(weight_mean),
Treatment = as.vector(weight_treatment),
Residual = as.vector(weight_residual)
) %>%
pivot_longer(cols = c("Mean", "Treatment", "Residual"),
names_to = "Component", values_to = "Value")
# ----- Plot Snout (med køn i farve) -----
ggplot(df_snout, aes(x = ID, y = Value, fill = Component)) +
geom_bar(stat = "identity") +
facet_wrap(~Gender, scales = "free_x") +
ggtitle("Dekomponering af Snout (Length) opdelt efter køn") +
ylab("cm") +
theme_minimal()
# ----- Plot Weight (med køn i farve) -----
ggplot(df_weight, aes(x = ID, y = Value, fill = Component)) +
geom_bar(stat = "identity") +
facet_wrap(~Gender, scales = "free_x") +
ggtitle("Dekomponering af Weight opdelt efter køn") +
ylab("kg") +
theme_minimal()
library(tidyr)
library(dplyr)
library(ggplot2)
# Tilføj Gender label
gender <- c(rep("Female", 5), rep("Male", 5))
ID <- factor(paste0("Obs", 1:10), levels = paste0("Obs", 1:10))
# ---- SNOUT ----
df_snout <- data.frame(
ID = ID,
Gender = gender,
Mean = as.vector(snout_mean),
Treatment = as.vector(snout_treatment),
Residual = as.vector(snout_residual)
) %>%
pivot_longer(cols = c("Mean", "Treatment", "Residual"),
names_to = "Component", values_to = "Value") %>%
mutate(Component = factor(Component, levels = c("Mean", "Treatment", "Residual")))
# ---- WEIGHT ----
df_weight <- data.frame(
ID = ID,
Gender = gender,
Mean = as.vector(weight_mean),
Treatment = as.vector(weight_treatment),
Residual = as.vector(weight_residual)
) %>%
pivot_longer(cols = c("Mean", "Treatment", "Residual"),
names_to = "Component", values_to = "Value") %>%
mutate(Component = factor(Component, levels = c("Mean", "Treatment", "Residual")))
# ---- Farver ----
custom_colors <- c("Mean" = "#F8766D", "Treatment" = "#619CFF", "Residual" = "#00BA38")
# ---- Plot SNOUT ----
ggplot(df_snout, aes(x = ID, y = Value, fill = Component)) +
geom_bar(stat = "identity") +
geom_text(aes(label = round(Value, 1)),
position = position_stack(vjust = 0.5),
size = 3, color = "black") +
facet_wrap(~Gender, scales = "free_x") +
scale_fill_manual(values = custom_colors) +
ggtitle("Dekomponering af Snout (Length) opdelt efter køn") +
ylab("cm") +
theme_minimal()
# ---- Plot WEIGHT ----
ggplot(df_weight, aes(x = ID, y = Value, fill = Component)) +
geom_bar(stat = "identity") +
geom_text(aes(label = round(Value, 1)),
position = position_stack(vjust = 0.5),
size = 3, color = "black") +
facet_wrap(~Gender, scales = "free_x") +
scale_fill_manual(values = custom_colors) +
ggtitle("Dekomponering af Weight opdelt efter køn") +
ylab("kg") +
theme_minimal()
(Hotellingâs T² test: Hâ: Îź_female = Îź_male)
4.1 Hotellingâs T² test * Hotellingâs T² test for forskel i middelvektor:
# UdtrĂŚk data for hver gruppe (kun numeriske kolonner)
Xf <- T6.19[T6.19$Gender == "Female", 1:2] # Snout og Weight
Xm <- T6.19[T6.19$Gender == "Male", 1:2]
# Gruppestørrelser
n1 <- nrow(Xf)
n2 <- nrow(Xm)
p <- ncol(Xf) # Antal variable
# GruppemiddelvĂŚrdier
xbar1 <- colMeans(Xf)
xbar2 <- colMeans(Xm)
# Pooled kovariansmatrix
S1 <- cov(Xf)
S2 <- cov(Xm)
Spooled <- ((n1 - 1)*S1 + (n2 - 1)*S2)/(n1 + n2 - 2)
# Hotellingâs T² teststørrelse
T2 <- (n1 * n2) / (n1 + n2) * t(xbar1 - xbar2) %*% solve(Spooled) %*% (xbar1 - xbar2)
# Kritisk vĂŚrdi og p-vĂŚrdi
F_crit <- ((n1 + n2 - 2) * p / (n1 + n2 - p - 1)) * qf(0.95, p, n1 + n2 - p - 1)
F_val <- ((n1 + n2 - p - 1) * T2) / ((n1 + n2 - 2) * p)
p_val <- 1 - pf(F_val, p, n1 + n2 - p - 1)
# Udskriv resultat
cat("Hotellingâs T² test:\n")
## Hotellingâs T² test:
cat("T² =", round(T2, 4), "\n")
## T² = 76.9153
cat("F =", round(F_val, 4), "\n")
## F = 37.7455
cat("p-vĂŚrdi =", format.pval(p_val, digits = 5), "\n")
## p-vĂŚrdi = 0.000000000064296
Hotellingâs T² test undersøger, om der er en samlet forskel i middelvektorerne for Snout Length og Weight mellem hunner og hanner.
Da p-vĂŚrdien er ekstremt lav (p ⪠0.05), forkaster vi nulhypotesen Hâ: Îź_female = Îź_male.
Konklusion: Der er en signifikant multivariat forskel pü lÌngde og vÌgt mellem de to køn.
4.2 Generaliseret stikprøvevarians (|S|)
(relevant for Hotellingâs T² og Mahalanobis-afstand)
# 1. Generaliseret stikprøvevarians for pooled matrix
Xf <- T6.19[T6.19$Gender == "Female", 1:2]
Xm <- T6.19[T6.19$Gender == "Male", 1:2]
n1 <- nrow(Xf); n2 <- nrow(Xm)
S1 <- cov(Xf); S2 <- cov(Xm)
Spooled <- ((n1 - 1)*S1 + (n2 - 1)*S2)/(n1 + n2 - 2)
# Determinant = generaliseret varians
gen_var <- det(Spooled)
cat("Generaliseret stikprøvevarians (|S|):", round(gen_var, 4))
## Generaliseret stikprøvevarians (|S|): 86170.38
Fortolkning
cat("\n|S| udtrykker den samlede multivariate variation og volumen i datasĂŚttet.\n",
"En høj vÌrdi indikerer stor spredning i büde lÌngde og vÌgt.\n")
##
## |S| udtrykker den samlede multivariate variation og volumen i datasĂŚttet.
## En høj vÌrdi indikerer stor spredning i büde lÌngde og vÌgt.
|S| udtrykker den samlede multivariate variation og volumen i datasÌttet. En høj vÌrdi indikerer stor spredning i büde lÌngde og vÌgt.
cat("Generaliseringen |S| = |D| â
|R| viser, at varians og korrelation sammen bestemmer spredningens volumen.\n")
## Generaliseringen |S| = |D| â
|R| viser, at varians og korrelation sammen bestemmer spredningens volumen.
cat("BemÌrk: Determinanten af Spooled-matrixen fungerer som en generaliseret stikprøvevarians.\n",
"En lav vÌrdi tyder pü høj korrelation mellem variablerne, mens en høj vÌrdi tyder pü større uafhÌngighed.\n")
## BemÌrk: Determinanten af Spooled-matrixen fungerer som en generaliseret stikprøvevarians.
## En lav vÌrdi tyder pü høj korrelation mellem variablerne, mens en høj vÌrdi tyder pü større uafhÌngighed.
|S| mĂĽler samlet spredning. Lav vĂŚrdi â høj korrelation. Høj vĂŚrdi â større uafhĂŚngighed.
4.3 Visualisering: Hotellingâs T²-ellipse
#Grafisk med ellipse pakken
library(ellipse)
##
## Attaching package: 'ellipse'
## The following object is masked from 'package:car':
##
## ellipse
## The following object is masked from 'package:graphics':
##
## pairs
# Brug Spooled og center i forskellen mellem køn
Xf <- T6.19[T6.19$Gender == "Female", 1:2]
Xm <- T6.19[T6.19$Gender == "Male", 1:2]
xbar1 <- colMeans(Xf)
xbar2 <- colMeans(Xm)
diff_mean <- xbar1 - xbar2
n1 <- nrow(Xf); n2 <- nrow(Xm)
Spooled <- ((n1 - 1)*cov(Xf) + (n2 - 1)*cov(Xm))/(n1 + n2 - 2)
n <- (n1 * n2) / (n1 + n2)
alpha <- 0.05
p <- 2
T2_crit <- ((n1 + n2 - 2) * p / (n1 + n2 - p - 1)) * qf(1 - alpha, p, n1 + n2 - p - 1)
# Plot
plot(ellipse(Spooled / n, centre = diff_mean, level = 1 - alpha,
t = sqrt(T2_crit)), type = "l", col = "darkblue",
xlab = "Snout Vent Length (cm)", ylab = "Weight (kg)",
main = "Hotellingâs T² Ellipse (Volumen â |S|)")
points(0, 0, pch = 4, col = "red")
text(0, 0, "0", pos = 1, cex = 0.8)
# Grafisk uden ellipse pakken
# Parametre
xbar1 <- colMeans(Xf)
xbar2 <- colMeans(Xm)
diff_mean <- xbar1 - xbar2
n <- (n1 * n2) / (n1 + n2)
alpha <- 0.05
p <- 2
T2_crit <- ((n1 + n2 - 2) * p / (n1 + n2 - p - 1)) * qf(1 - alpha, p, n1 + n2 - p - 1)
# GenerĂŠr ellipse uden ellipse-pakke
theta <- seq(0, 2*pi, length.out = 200)
circle <- cbind(cos(theta), sin(theta))
A <- chol(Spooled / n * T2_crit)
ellipse_coords <- t(diff_mean + t(circle %*% A))
# Plot
plot(ellipse_coords, type = "l", lwd = 2, col = "black",
xlab = "Snout Vent Length (cm)", ylab = "Weight (kg)",
main = "Hotellingâs T² Ellipse (uden ellipse-pakke)")
points(0, 0, col = "blue", pch = 19)
text(0, 0, "0", pos = 1)
points(diff_mean[1], diff_mean[2], pch = 19, col = "darkblue")
#text(diff_mean[1], diff_mean[2], "M1 - M2", pos = 4, col = "darkblue")
text(diff_mean[1], diff_mean[2], expression(mu[1] - mu[2]), pos = 4, col = "darkblue")
đ Punktet âM1 - M2â reprĂŚsenterer forskellen mellem de to gruppers stikprøvegennemsnit. Hvis dette punkt falder uden for T²-ellipsen, tyder det pĂĽ en signifikant forskel i middelvĂŚrdierne.
â Kort opsummering til posteren:
Den sorte ellipse reprÌsenterer omrüdet for ikke-signifikante differenser i middelvektorer mellem de to køn.
Da det blĂĽ punkt (Îźâ â Îźâ) falder uden for ellipsen, forkastes Hâ â der er en signifikant forskel.
4.4 Mannova (Simpel med wilks)
# Multivariat variansanalyse (MANOVA)
model_manova <- manova(cbind(`Snout Vent Length (cm)`, `Weight (kg)`) ~ Gender, data = T6.19)
# Samlet test med summary
summary(model_manova, test = "Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## Gender 1 0.41248 37.745 2 53 0.0000000000643 ***
## Residuals 54
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Kort forklaring til posteren (valgfrit)
Der blev udført en multivariat variansanalyse (MANOVA) for at teste, om der er en samlet forskel i lÌngde og vÌgt mellem hanner og hunner.
Wilksâ lambda-test viste en meget signifikant forskel (p < 2.2e-16), hvilket bekrĂŚfter resultaterne fra de univariate t-tests.
4.4b Mannova med Bartletts korrektion
library(dplyr)
# Data: responsvariabler og grupper
Y <- T6.19[, 1:2] # Snout & Weight
group <- T6.19$Gender
n1 <- sum(group == "Female")
n2 <- sum(group == "Male")
g <- 2
p <- ncol(Y)
n_total <- nrow(Y)
# Fit MANOVA-model
fit <- manova(as.matrix(Y) ~ group)
# Kort metode â direkte Wilks-test
summary(fit, test = "Wilks")
## Df Wilks approx F num Df den Df Pr(>F)
## group 1 0.41248 37.745 2 53 0.0000000000643 ***
## Residuals 54
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Lang metode â UdtrĂŚk SSCP-matricer
B <- summary(fit)$SS$group
W <- summary(fit)$SS$Residuals
T_mat <- B + W
# Frihedsgrader
df1 <- p * (g - 1) # for behandling
df2 <- p * (n_total - g) # for residualer
# Wilks' lambda
lambda <- det(W) / det(T_mat)
# Chi-i-anden Bartletts korrektion (Resultat 6.8 / 6.43)
m <- (n1 + n2 - 2) - (p + g) / 2
chi2_stat <- -m * log(lambda)
chi2_crit <- qchisq(0.95, df1)
pval_chi2 <- pchisq(chi2_stat, df1, lower.tail = FALSE)
# Udskriv resultater
cat("Wilksâ lambda: ", round(lambda, 4), "\n")
## Wilksâ lambda: 0.4125
cat("Chi²-test (Bartletts korrektion):\n")
## Chi²-test (Bartletts korrektion):
cat("Teststatistik:", round(chi2_stat, 3), "\n")
## Teststatistik: 46.049
cat("Kritisk vĂŚrdi (Ď², 95%, df =", df1, "):", round(chi2_crit, 3), "\n")
## Kritisk vĂŚrdi (Ď², 95%, df = 2 ): 5.991
cat("p-vĂŚrdi:", signif(pval_chi2, 4), "\n")
## p-vĂŚrdi: 0.0000000001001
cat("Konklusion: ", ifelse(pval_chi2 < 0.05, "Forkast Hâ â Køn pĂĽvirker mindst ĂŠn af variablerne.", "Forkast ikke Hâ â Ingen forskel."), "\n")
## Konklusion: Forkast Hâ â Køn pĂĽvirker mindst ĂŠn af variablerne.
đ MANOVA-resultat (ud fra Bartletts korrektion)
Wilksâ Îť = 0.4125
Ď² = 46.05, kritisk Ď² = 5.99, df = 2
p < 0.0000000001
â Konklusion: Forkast Hâ â køn har signifikant indflydelse pĂĽ mindst ĂŠn af variablerne (Snout Length eller Weight).
SAmlet konklusion pĂĽ MANNVA:
đ Kort fortolkning til posteren (tekst)
MANOVA: Multivariat variansanalyse
Undersøger om middelvektorerne for Snout Length og Weight er ens mellem hunner og hanner:
Hâ: Îź<sub>female</sub> = Îź<sub>male</sub>
Hâ: Îź<sub>female</sub> â Îź<sub>male</sub>
Resultat:
Wilksâ Îť = 0.4125
F(2, 53) = 37.745, p < 0.0000000001
Ď² = 46.05, kritisk Ď² = 5.99, df = 2
p < 0.0000000001
â
Begge tests fører til samme konklusion:
Forkast Hâ â køn pĂĽvirker signifikant mindst ĂŠn af variablerne (snout eller vĂŚgt eller begge).
# Boxâs M test for lighed i kovariansmatricer
# ForudsĂŚt: T6.19 har kolonner: Snout Vent Length (cm), Weight (kg), Gender
library(dplyr)
# Del op efter køn
Xf <- T6.19 %>%
filter(Gender == "Female") %>%
dplyr::select(`Snout Vent Length (cm)`, `Weight (kg)`)
Xm <- T6.19 %>%
filter(Gender == "Male") %>%
dplyr::select(`Snout Vent Length (cm)`, `Weight (kg)`)
n1 <- nrow(Xf)
n2 <- nrow(Xm)
p <- ncol(Xf)
g <- 2
n_total <- n1 + n2
# Kovariansmatricer
S1 <- cov(Xf)
S2 <- cov(Xm)
# Determinanter
detS1 <- det(S1)
detS2 <- det(S2)
# Spooled
Spooled <- ((n1 - 1) * S1 + (n2 - 1) * S2) / (n_total - g)
detSpooled <- det(Spooled)
# Box's M test statistik
M <- (n1 - 1) * log(detSpooled / detS1) + (n2 - 1) * log(detSpooled / detS2)
# Korrektionsfaktor
u <- ((1 / (n1 - 1)) + (1 / (n2 - 1)) - (1 / (n_total - g))) *
(2 * p^2 + 3 * p - 1) / (6 * (p + 1) * (g - 1))
C <- (1 - u) * M
df <- p * (p + 1) * (g - 1) / 2
pval <- pchisq(C, df, lower.tail = FALSE)
# Resultat
cat("Boxâs M test:\n")
## Boxâs M test:
cat("Teststatistik:", round(C, 3), "\n")
## Teststatistik: 100.325
cat("Frihedsgrader:", df, "\n")
## Frihedsgrader: 3
cat("p-vĂŚrdi:", signif(pval, 4), "\n")
## p-vĂŚrdi: 0.000000000000000000001323
cat("Konklusion: ", ifelse(pval < 0.05,
"Forkast Hâ â kovariansmatricerne er forskellige.",
"Forkast ikke Hâ â antag ens kovariansmatricer."), "\n")
## Konklusion: Forkast Hâ â kovariansmatricerne er forskellige.
đ 2. Fortolkning til posteren (Boxâs M-test)
Boxâs M-test undersøger, om varians-/kovariansstrukturen er ens mellem grupperne â her: hunner og hanner. Hypoteser:
Hâ: Kovariansmatricerne for de to køn er ens
Hâ: Mindst ĂŠn gruppe har en anden kovariansstruktur
Resultat:
Teststatistik: 100.325
Frihedsgrader: 3
p-vĂŚrdi: < 0.0000000000000000000014
â Konklusion: Da p-vĂŚrdien er langt under 0.05, forkastes Hâ. Der er signifikant forskel i kovariansmatricerne â dvs. variabiliteten i lĂŚngde og vĂŚgt varierer mellem hunner og hanner.
(Ellipse + Bonferroni CI)